#' Caterpillar plot with thick and thin CI
#'
#' Caterpillar plots are plotted combining all chains for each parameter.
#'
#' @param D Data frame whith the simulations or list of data frame with simulations. If a list of data frames with simulations is passed, the names of the models are the names of the objects in the list.
#' @param X data frame with two columns, Parameter and the value for the x location. Parameter must be a character vector with the same names that the parameters in the D object. 
#' @param family Name of the family of parameters to plot, as given by a character vector or a regular expression. A family of parameters is considered to be any group of parameters with the same name but different numerical value between square brackets (as beta[1], beta[2], etc). 
#' @param thick_ci Vector of length 2 with the quantiles of the thick band for the credible interval
#' @param thin_ci Vector of length 2 with the quantiles of the thin band for the credible interval
#' @param line Numerical value indicating a concrete position, usually used to mark where zero is. By default do not plot any line.
#' @param horizontal Logical. When TRUE (the default), the plot has horizontal lines. When FALSE, the plot is reversed to show vertical lines. Horizontal lines are more appropriate for categorical caterpillar plots, because the x-axis is the only dimension that matters. But for caterpillar plots against another variable, the vertical position is more appropriate.
#' @param model_labels Vector of strings that matches the number of models in the list. It is only used in case of multiple models and when the list of ggs objects given at \code{D} is not named. Otherwise, the names in the list are used.
#' @param param_label_from Vector of strings (can be regular expressions) for the original paramater labels that you would like to replace in the plot lables using \code{param_label_to}. 
#' @param param_label_to Vector of strings for paramater labels you would like to use. Must be both the same length as \code{param_label_from} and in the same order.
#' @param order Logical whether or not to order the parameters by their medians.
#' @return A \code{ggplot} object.
#'
#' @importFrom plyr ddply
#' @importFrom reshape2 dcast
#' @importFrom ggplot2 ggplot
#' @export
#' @examples
#' data(samples)
#' ggs_caterpillar(ggs(S))
#' ggs_caterpillar(list(A=ggs(S), B=ggs(S))) # silly example duplicating the same model


ggs_caterpillar_label <- function(D, family=NA, X=NA, 
                                  thick_ci=c(0.05, 0.95), thin_ci=c(0.025, 0.975),
                                  line=NA, horizontal=TRUE, model_labels=NULL,
                                  param_label_from = NULL, param_label_to = NULL,
                                  order = TRUE) {
  ### Remove in package version
  require(plyr)
  require(reshape2)
  require(ggplot2)
  
  # Manage subsetting a family of parameters
  if (!is.na(family)) {
    D <- ggmcmc:::get_family(D, family=family)
  }
  # Manage adding a X dataframe, and check potential errors in X
  x.present <- FALSE
  if (class(X)=="data.frame") {
    # check number of observations
    if (dim(X)[1] != attributes(D)$nParameters) {
      stop("X has a different number of observations than the number of parameters to plot.")
    }
    # get the name of the numerical variable
    x.name <- names(X)[which(names(X)!="Parameter")]
    # check that the x is numerical
    if (!is.numeric(X[,which(names(X)==x.name)])) {
      stop("X has a non numeric variable to plot against. The variable different than 'Parameter' must be numeric.")
    }
    # Set x present to distinguish between plots against x or without
    x.present <- TRUE
  }
  
  # Passing the "probs" argument to quantile inside ddply inside a function
  # turns out to be really hard.
  # Apparently there is a bug in ddply that even Hadley does not know how to
  # solve
  # http://stackoverflow.com/questions/6955128/object-not-found-error-with-ddply-inside-a-function
  # One of the solutions, not elegant, is to assign qs globally (as well as
  # locally  for further commands in this function
  qs  <- qs <<- c(thin.low=thin_ci[1], thick.low=thick_ci[1], 
                  median=0.5, thick.high=thick_ci[2], thin.high=thin_ci[2])
  
  # Multiple models or a single model
  #
  if (!is.data.frame(D)) { # D is a list, and so multiple models are passed
    multi <- TRUE # used later in plot call
    for (i in 1:length(D)) { # iterate over list elements
      dc <- ddply(D[[i]], .(Parameter), summarize,
                  q=quantile(value, probs=qs), qs=qs)
      dc$qs <- factor(dc$qs, labels=names(qs))
      dcm <- dcast(dc, Parameter ~ qs, value.var="q")
      D[[i]] <- dcm # replace list element with transformed list element
    }
    #
    # Get model names, by default the description attribute of the ggs object
    model.names <- lapply(D, function(x) return(attributes(x)$description))
    # But prevalence is for the names of the named list, not for labels or for the description
    if (length(model_labels)==length(D)) model.names <- model_labels       # get model names from labels
    if (length(names(D)!=0)) model.names <- names(D)           # get model names from named list
    # Final data frame to use for plotting
    dcm <- do.call(
      rbind, 
      lapply(1:length(D), 
             function(i) if (length(D[[i]]) > 1) cbind(D[[i]], Model=model.names[i])))
    
  } else if (is.data.frame(D)) { # D is a data frame, and so a single model is passed
    multi <-  FALSE
    dc <- ddply(D, .(Parameter), summarize, 
                q=quantile(value, probs=qs), qs=qs,
                .parallel=attributes(D)$parallel)
    dc$qs <- factor(dc$qs, labels=names(qs))
    dcm <- dcast(dc, Parameter ~ qs, value.var="q")
  }
  
  # Relabel plotted parameters
  if (!is.null(param_label_from) & !is.null(param_label_to)){
    if (length(param_label_from) != length(param_label_to)){
      stop('param_label_from must be the same length as param_label_to', .call = FALSE)
    }
    for (i in 1:length(param_label_from)){
      dcm[, 'Parameter'] <- gsub(pattern = param_label_from[i], 
                                 replacement = param_label_to[i], dcm[, 'Parameter'])  
    }
  }
  
  #
  # Plot, depending on continuous or categorical x
  #
  if (!x.present) {
    if (isTRUE(order)){
      f <- ggplot(dcm, aes(x=median, y=reorder(Parameter, median))) + geom_point(size=3) +
        geom_segment(aes(x=thick.low, xend=thick.high, yend=reorder(Parameter, median)), size=1.5) +
        geom_segment(aes(x=thin.low, xend=thin.high, yend=reorder(Parameter, median)), size=0.5) +
        xlab("HPD") + ylab("Parameter")
    } else {
      f <- ggplot(dcm, aes(x=median, y=Parameter)) + geom_point(size=3) +
        geom_segment(aes(x=thick.low, xend=thick.high, yend=reorder(Parameter, median)), size=1.5) +
        geom_segment(aes(x=thin.low, xend=thin.high, yend=reorder(Parameter, median)), size=0.5) +
        xlab("HPD") + ylab("Parameter")      
    }
  } else {
    dcm <- merge(dcm, X)
    f <- ggplot(dcm, aes_string(x="median", y=x.name)) + geom_point(size=3) +
      geom_segment(aes_string(x="thick.low", xend="thick.high", yend=x.name), size=1.5) +
      geom_segment(aes_string(x="thin.low", xend="thin.high", yend=x.name), size=0.5) +
      xlab("HPD") + ylab(x.name)
  }
  
  # Manage horizontal or vertical plot
  if (horizontal == FALSE) {
    f <- f + coord_flip() 
  }
  
  # Manage multiple models
  if (multi==TRUE & horizontal==TRUE) 
    f <- f + facet_grid(Model ~ ., scales="free", space="free")
  if (multi==TRUE & horizontal==FALSE)
    f <- f + facet_grid(. ~ Model, scales="free", space="free")
  
  # Manage axis labels
  if (!x.present & horizontal==FALSE) {
    f <- f + theme(legend.position="none", axis.text.x=element_text(size=7, hjust=1, angle=90))
  } else {
    f <- f + theme(legend.position="none", axis.text.x=element_text(size=7, hjust=1))
  } 
  
  # Add a line to remark a specific point
  if (!is.na(line)) {
    f <- f + geom_vline(xintercept=line, linetype="dashed")
  }
  return(f)
}